library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)
0 1 442 446
dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL
trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]
pander::pander(table(dataColonTrain$status))
| 0 | 1 |
|---|---|
| 308 | 313 |
pander::pander(table(dataColonTest$status))
| 0 | 1 |
|---|---|
| 134 | 133 |
ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)
[++++++++++++++++++++++++++++++++++-++-+++++++++-]….
sm <- summary(ml)
pander::pander(sm$coefficients)
| Estimate | lower | HR | upper | u.Accuracy | |
|---|---|---|---|---|---|
| extent | 4.23e-01 | 1.223 | 1.526 | 1.905 | 0.564 |
| node4 | 3.64e-01 | 1.133 | 1.439 | 1.828 | 0.593 |
| age_node4 | 1.29e-03 | 1.000 | 1.001 | 1.002 | 0.593 |
| rxLev_5FU_age | -4.81e-03 | 0.992 | 0.995 | 0.998 | 0.557 |
| rxLev_5FU | -5.97e-02 | 0.902 | 0.942 | 0.984 | 0.557 |
| nodes | 3.81e-02 | 1.010 | 1.039 | 1.068 | 0.601 |
| surg | 2.35e-01 | 1.053 | 1.265 | 1.519 | 0.541 |
| adhere | 5.21e-07 | 1.000 | 1.000 | 1.000 | 0.528 |
| age_nodes | 1.44e-04 | 1.000 | 1.000 | 1.000 | 0.605 |
| age_surg | 2.03e-08 | 1.000 | 1.000 | 1.000 | 0.541 |
| age_adhere | 4.25e-09 | 1.000 | 1.000 | 1.000 | 0.528 |
| age_extent | 2.18e-09 | 1.000 | 1.000 | 1.000 | 0.528 |
| r.Accuracy | full.Accuracy | u.AUC | r.AUC | full.AUC | |
|---|---|---|---|---|---|
| extent | 0.611 | 0.621 | 0.561 | 0.612 | 0.622 |
| node4 | 0.641 | 0.621 | 0.594 | 0.641 | 0.622 |
| age_node4 | 0.591 | 0.607 | 0.594 | 0.592 | 0.609 |
| rxLev_5FU_age | 0.609 | 0.621 | 0.556 | 0.609 | 0.622 |
| rxLev_5FU | 0.589 | 0.607 | 0.556 | 0.591 | 0.609 |
| nodes | 0.617 | 0.621 | 0.602 | 0.618 | 0.622 |
| surg | 0.631 | 0.620 | 0.543 | 0.632 | 0.621 |
| adhere | 0.504 | 0.528 | 0.531 | 0.500 | 0.531 |
| age_nodes | 0.588 | 0.607 | 0.607 | 0.590 | 0.609 |
| age_surg | 0.504 | 0.541 | 0.543 | 0.500 | 0.543 |
| age_adhere | 0.504 | 0.528 | 0.531 | 0.500 | 0.531 |
| age_extent | 0.504 | 0.528 | 0.528 | 0.500 | 0.528 |
| IDI | NRI | z.IDI | z.NRI | Delta.AUC | Frequency | |
|---|---|---|---|---|---|---|
| extent | 0.01708 | 0.237 | 3.74 | 4.20 | 0.00981 | 1.0 |
| node4 | 0.00769 | 0.340 | 2.99 | 4.99 | -0.01925 | 1.0 |
| age_node4 | 0.00702 | 0.314 | 2.94 | 4.64 | 0.01675 | 1.0 |
| rxLev_5FU_age | 0.00924 | 0.223 | 2.89 | 2.99 | 0.01297 | 1.0 |
| rxLev_5FU | 0.00797 | 0.223 | 2.68 | 2.99 | 0.01726 | 1.0 |
| nodes | 0.00508 | 0.195 | 2.65 | 2.46 | 0.00435 | 1.0 |
| surg | 0.00703 | 0.171 | 2.53 | 2.39 | -0.01111 | 1.0 |
| adhere | 0.00461 | 0.125 | 2.30 | 2.31 | 0.03114 | 0.7 |
| age_nodes | 0.00439 | 0.135 | 2.28 | 1.70 | 0.01881 | 1.0 |
| age_surg | 0.00594 | 0.171 | 2.24 | 2.39 | 0.04284 | 0.9 |
| age_adhere | 0.00363 | 0.125 | 2.07 | 2.31 | 0.03114 | 0.4 |
| age_extent | 0.00482 | 0.111 | 1.93 | 1.39 | 0.02777 | 0.4 |
Here we evaluate the model using the RRPlot() function.
Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years
index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)
h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))
rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Raw Train: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.517 | 0.328 | 0.229 | 0.11697 | 0.496 |
| RR | 1.606 | 1.991 | 3.903 | 1.00000 | 1.607 |
| SEN | 0.272 | 0.757 | 0.984 | 1.00000 | 0.278 |
| SPE | 0.896 | 0.539 | 0.104 | 0.00649 | 0.893 |
| BACC | 0.584 | 0.648 | 0.544 | 0.50325 | 0.585 |
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.971 | 0.866 | 1.08 | 0.636 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.44 | 1.44 | 1.42 | 1.47 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.29 | 1.29 | 1.29 | 1.3 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.654 | 0.654 | 0.624 | 0.687 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.688 | 0.646 | 0.729 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.272 | 0.223 | 0.324 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.899 | 0.86 | 0.931 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% |
|---|
| 0.518 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.61 | 1.39 | 1.86 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 505 | 228 | 271.6 | 6.99 | 53.3 |
| class=1 | 116 | 85 | 41.4 | 45.80 | 53.3 |
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
| h0 | Gain | DeltaTime |
|---|---|---|
| 0.681 | 1.5 | 3.1 |
h0 <- calprob$h0
timeinterval <- calprob$timeInterval;
rdata <- cbind(dataColonTrain$status,calprob$prob)
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
timetoEvent=dataColonTrain$time,
title="Calibrated Train: Colon",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
| @:0.9 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.664 | 0.449 | 0.323 | 0.16999 | 0.499 |
| RR | 1.606 | 1.991 | 3.903 | 1.00000 | 1.576 |
| SEN | 0.272 | 0.757 | 0.984 | 1.00000 | 0.502 |
| SPE | 0.896 | 0.539 | 0.104 | 0.00649 | 0.724 |
| BACC | 0.584 | 0.648 | 0.544 | 0.50325 | 0.613 |
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.794 | 0.708 | 0.887 | 2.57e-05 |
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.998 | 0.998 | 0.982 | 1.01 |
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.01 | 1.01 | 1.01 | 1.01 |
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.654 | 0.654 | 0.623 | 0.686 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.688 | 0.646 | 0.729 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.272 | 0.223 | 0.324 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.899 | 0.86 | 0.931 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
| 90% |
|---|
| 0.665 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.61 | 1.39 | 1.86 |
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 505 | 228 | 271.6 | 6.99 | 53.3 |
| class=1 | 116 | 85 | 41.4 | 45.80 | 53.3 |
The calibrated h0 and timeinterval were estimated on the training set
index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))
rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=dataColonTest$time,
title="Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
| @:0.66 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.664 | 0.496 | 0.393 | 0.19235 | 0.499 |
| RR | 1.758 | 2.028 | 2.480 | 1.00000 | 1.994 |
| SEN | 0.278 | 0.609 | 0.887 | 1.00000 | 0.594 |
| SPE | 0.918 | 0.739 | 0.366 | 0.00746 | 0.746 |
| BACC | 0.598 | 0.674 | 0.626 | 0.50373 | 0.670 |
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.792 | 0.663 | 0.939 | 0.00611 |
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.04 | 1.04 | 1.02 | 1.06 |
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.04 | 1.04 | 1.03 | 1.04 |
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.688 | 0.688 | 0.644 | 0.729 |
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.72 | 0.658 | 0.781 |
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.271 | 0.197 | 0.355 |
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.918 | 0.858 | 0.958 |
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
| 90% |
|---|
| 0.665 |
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.76 | 1.42 | 2.18 |
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 220 | 97 | 117.7 | 3.64 | 32.1 |
| class=1 | 47 | 36 | 15.3 | 28.04 | 32.1 |
Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process
rcv <- randomCV(theData=dataColonTrain,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.75,
repetitions=50,
classSamplingType = "Pro",
testingSet=dataColonTest
)
.[+++++].[+++++].[+++++++].[+++++].[+++].[+++++].[++++-].[++++-].[++++++++].[++++–]10 Tested: 852 Avg. Selected: 8.5 Min Tests: 1 Max Tests: 10 Mean Tests: 4.964789 . MAD: 0.4756285
.[++++++].[+++-].[++-].[+++].[++++-].[+++-].[++++++].[++–].[+++++++++].[+++++]20 Tested: 888 Avg. Selected: 8.3 Min Tests: 1 Max Tests: 20 Mean Tests: 9.527027 . MAD: 0.4763477
.[++-].[+++].[++–].[++++-].[++++].[++++].[+++++].[++++++++++]..[+++++].[+++–]30 Tested: 888 Avg. Selected: 8.066667 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4765761
.[+++-].[++++++].[++++++++].[++-].[++++-].[+++].[++++].[+++].[+++++++-].[+++++]40 Tested: 888 Avg. Selected: 8.35 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4764345
.[+-++-].[++–].[+++–].[++-].[+++++–].[++].[++++].[++–].[+++-].[++++++–]50 Tested: 888 Avg. Selected: 8.12 Min Tests: 4 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4764308
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
timetoEvent=times,
title="CV Test: Colon Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)
pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
| @:0.66 | @MAX_BACC | @MAX_RR | @SPE100 | p(0.5) | |
|---|---|---|---|---|---|
| Thr | 0.665 | 0.478 | 0.383 | 0.3103 | 0.500 |
| RR | 1.644 | 1.693 | 2.481 | 45.6655 | 1.597 |
| SEN | 0.184 | 0.603 | 0.973 | 1.0000 | 0.482 |
| SPE | 0.943 | 0.658 | 0.102 | 0.0204 | 0.747 |
| BACC | 0.564 | 0.631 | 0.537 | 0.5102 | 0.614 |
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
| O/E | Low | Upper | p.value |
|---|---|---|---|
| 0.769 | 0.699 | 0.844 | 7.64e-09 |
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 0.995 | 0.995 | 0.984 | 1.01 |
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
| mean | 50% | 2.5% | 97.5% |
|---|---|---|---|
| 1.02 | 1.02 | 1.02 | 1.03 |
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
| mean.C Index | median | lower | upper |
|---|---|---|---|
| 0.648 | 0.648 | 0.624 | 0.672 |
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
| est | lower | upper |
|---|---|---|
| 0.669 | 0.634 | 0.705 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
| est | lower | upper |
|---|---|---|
| 0.184 | 0.149 | 0.223 |
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
| est | lower | upper |
|---|---|---|
| 0.943 | 0.918 | 0.963 |
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
| 90% |
|---|
| 0.665 |
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
| est | lower | upper |
|---|---|---|
| 1.64 | 1.45 | 1.87 |
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
| N | Observed | Expected | (O-E)^2/E | (O-E)^2/V | |
|---|---|---|---|---|---|
| class=0 | 781 | 364 | 409.5 | 5.05 | 62.1 |
| class=1 | 107 | 82 | 36.5 | 56.65 | 62.1 |